org 100h

%define XRES 320
%define YRES 200
%define VESA_MODE 0x10e

;%define XRES 640
;%define YRES 480
;%define VESA_MODE 0x111

;%define XRES 1024
;%define YRES 768
;%define VESA_MODE 0x117

  push 0x8000 ; table: cos
  pop ds     
  push 0x7000 ; table: color_multiplier/cos
  pop fs     
  push 0x6000 ; table: 0.5 + cos
  pop gs     
  push 0xa000
  pop es      ; screen
  xor bp,bp

;Cos table with 16384 entries
  fninit
COS_TAB:
  imul bx,[bp+di],4 ; bx=[ss:bp+di]=[ss:-2]=angle (0 on init)
  fild word[bp+di]
  fidiv word[bp+C16K_DIV_2PI]
  fcos           ;; cos(angle/65536*2pi): adjust period to 2pi
  fst dword[bx]

  fld st0
  fadd dword[bp+CHALF]
  fstp dword[gs:bx]

  fldl2t          ; color multiplier
  fdivrp st1,st0 ;; 3.32/cos(...)
  fstp dword[fs:bx]
  inc word[bp+di] ; next angle
  jnz COS_TAB     ; bx=4

  mov ax,0x4f02
  mov bx,VESA_MODE ; 1024x768, 65536 colors (16-bit)
  int 10h

  lea bp,[bx+si]
%define b(xx) byte[byte bp+xx-0x100-VESA_MODE]
%define w(xx) word[byte bp+xx-0x100-VESA_MODE]
%define d(xx) dword[byte bp+xx-0x100-VESA_MODE]

M:
;  add w(T),652*4
;  add w(T2),403*4  ; (sqrt(5) - 1) / 2
;  add w(T3),922*4  ; sqrt(2)

;  add w(T),163*4    ; 1/16 * 65536/2pi
;  add w(T2),113*4   ; 1/16 * 65536/2pi * 0.693147180559945309417232121458177 ln(2)
;  add w(T3),235*4   ; 1/16 * 65536/2pi * 1.44269504088896340735992468100189  1/ln(2)

  add w(T),41*4    ; 1/64 * 65536/2pi
  add w(T2),28*4   ; 1/64 * 65536/2pi * ln(2)
  add w(T3),59*4   ; 1/64 * 65536/2pi / ln(2)

  mov bx,w(T2)
  fld dword[bx]         ;; cos(t2)
  fidiv w(C10)          ;; cos(t2)/10
  fld1
  fsubrp st1,st0        ;; scale=1-cos(t2)/10

  mov bx,w(T)
  fld st0
  fmul dword[bx+0x4000] ;; S=sin(t)*scale scale
  fstp d(S)
  fmul dword[bx]        ;; C=cos(t)*scale
  fstp d(C)


; Pixel loop

  xor di,di
  xor dx,dx      ; dx:di = VESA window:address
  mov w(Y),-(YRES-1)
PIX_Y:
  mov w(X),-(XRES-1)
PIX_X:

; Set VESA window
  test di,di
  jnz NZ_DI
  mov ax,0x4f05  ; each line: set window, assume 64kB granularity
  xor bx,bx      ; bh=0 bl=window=0 dx=page
  int 10h
  inc dx
NZ_DI:

  fldz
  fldz
  fldz       ; R=0 G=0 B=0

  fild w(Y)
  fidiv w(C_YRES) ;; y[-1..1] R G B

  fild w(X)
  fidiv w(C_YRES) ;; x[-1.33..1.33] y[-1..1] R G B

  stc
  call LEN
  fmul d(CHALF)
  fistp dword[bp+si] ; ax = d = length(x,y)/2 as cos_index
  imul ax,[bp+si],4

  mov cl,12     ; cx = i
I:
; rotate and scale
  ;[x] = [C -S]*[x]
  ;[y]   [S  C] [y]
R fld st1         ;; y x y R G B    | x Sy x Cy R G B
  fmul d(C)       ;; Cy x y R G B   | Cx Sy x Cy R G B
  fxch st2        ;; y x Cy R G B   | x Sy Cx Cy R G B
  fmul d(S)       ;; Sy x Cy R G B  | Sx Sy Cx Cy R G B
  neg cl
  js R
  faddp st3,st0  ;; Sy Cx Sx+Cy R G B
  fsubp st1,st0  ;; x=Cx-Sy y=Sx+Cy R G B

; square fold for now
F fsub d(CHALF) ;; x=x-0.5 y=y-0.5 R G B
  fist word[bp+si]
  fisub word[bp+si]
  fxch st1
  neg cl
  js F          ;; x=x-round(x) y=y-round(y) K R G B

; interfering concentric circles
  call LEN
  fistp word[bp+si]
  imul bx,[bp+si],5*4 ; 65536/2pi * (5*length(x,y)
  add bx,ax           ;              + d
  sub bx,w(T3)        ;              - t3)
  fld dword[fs:bx]    ;; k=2/cos(5*length(x,y) + d - t3) x y R G B
  fld st0
  fld st0             ;; k k k x y R G B

; RGB += k * ( 0.5 + cos(3*(t2 - d + i/100) + [0 -1 -2]) );
  imul bx,cx,26*4     ; bx = q = 65536/2pi * (i/100
  sub bx,ax           ;                       - d
  add bx,w(T2)        ;                       + t2
  imul bx,3           ;                      ) * 3

;G fmul dword[gs:bx+2*2608*4] ;; k*(0.5 * cos(q)) k k x y R G B
G fmul dword[gs:bx] ;; k*(0.5+cos(q)) k k x y R G B
  faddp st5,st0     ;; k k x y R+=k*(0.5+cos(q)) G B
  sub bx,2608*4
  add cl,0x55       ;; x y [R G B]+=k*(0.5+cos(q+[0 -1 -2]))
  jnc G         ; cl--
  jnz I

  fcompp

; RGB = RGB*RGB/256;  // square the sum for better contrast

  mov cl,3
COL:
  fmul st0
  fistp word[bp+si]
  mov bx,[bp+si]
  cmp bh,31
  jb NO_CLAMP
  mov bh,31
NO_CLAMP:
  shl ax,5
  add al,bh        ; .rrr|rrgg|gggb|bbbb
  loop COL
  shl ax,1         ; rrrr|rggg|ggbb|bbb.
  sub al,bh        ; rrrr|rggg|gg.b|bbbb

  stosw

  add w(X),2
  cmp w(X),XRES
  jl PIX_X

  add w(Y),2
  cmp w(Y),YRES
  jl PIX_Y

  in al,60h ; ESC check
  cmp al,1
  jne M

  mov ax,3 ; text mode
  int 10h
  ret

LEN: ;; x y -> r=sqrt(x*x+y*y)*16384/2pi x y  ; assume cf=0
  fld st1
  fmul st0
  cmc
  jnc LEN
  faddp st1,st0
  fsqrt
  fimul w(C16K_DIV_2PI)
  ret

C10          dw 10
C_YRES       dw YRES
C16K_DIV_2PI dw 2608 ; 16384/2pi

CHALF dd 0.5


T dw 0
T2 dw 0
T3 dw 0

section .bss

X resw 1
Y resw 1
C resd 1
S resd 1
